# -*- coding: utf-8 -*-
import scrapy
from henan.rules import *
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from scrapy.utils.project import get_project_settings

from henan.utils.order_captcha import OrderCaptchaHelper
from zc_core.middlewares.proxies.proxy_facade import ProxyFacade
from zc_core.model.items import Box
from zc_core.util.http_util import retry_request
from zc_core.spiders.base import BaseSpider

class OrderSpider(BaseSpider):
    name = 'order'
    # 覆盖配置
    custom_settings = {
        'CONCURRENT_REQUESTS': 1,
        'DOWNLOAD_DELAY': 0.1,
        'CONCURRENT_REQUESTS_PER_DOMAIN': 1,
        'CONCURRENT_REQUESTS_PER_IP': 1,
        'DOWNLOADER_MIDDLEWARES': {
            # 'henan.middlewares.OrderCaptchaMiddleware': 610,
            # 'henan.middlewares.SessionMiddleware': 655,
            # 'zc_core.middlewares.proxy.ProxyMiddleware': 650,
            # 'zc_core.middlewares.agent.UserAgentMiddleware': 640,
            # 'henan.validator.BizValidator': 500
        },
        'PROXY_SCORE_LIMIT': 0
    }
    # 常用链接
    # order_list_url = 'http://222.143.21.205:8081/policy/saleArticleList?categoryId=&pageNo={}&pageSize=20&title='
    index_url = 'http://222.143.21.205:8081/'
    order_list_url = 'http://222.143.21.205:8081/policy/saleArticleList?code={}'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(OrderSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        settings = get_project_settings()
        self.max_page = settings.get('MAX_ORDER_PAGE', 2000)

        self.captcha_helper = OrderCaptchaHelper()
        self.proxy_facade = ProxyFacade()

    def _send_request(self, page, code, proxy):
        return scrapy.Request(
                    method='POST',
                    url=self.order_list_url.format(code),
                    body=json.dumps({
                        'categoryId': '3',
                        'pageNo': str(page),
                        'pageSize': '20',
                        'officeName': '',
                        'title': '',
                    }),
                    headers={
                        'Host': '222.143.21.205:8081',
                        'Connection': 'keep-alive',
                        'Cache-Control': 'max-age=0',
                        'Origin': 'http://222.143.21.205:8081',
                        'Upgrade-Insecure-Requests': '1',
                        'Content-Type': 'application/x-www-form-urlencoded',
                        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400',
                        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
                        'Referer': 'http://222.143.21.205:8081/policy/saleArticleList',
                        'Accept-Encoding': 'gzip, deflate',
                        'Accept-Language': 'zh-CN,zh;q=0.9',
                    },
                    meta={
                        'reqType': 'order',
                        'page': str(page),
                        'proxy': proxy
                    },
                    callback=self.parse_order,
                    errback=self.error_back,
                )

    def start_requests(self):
        proxy = self.proxy_facade.get_proxy()
        code = self.captcha_helper.get_captcha_code(proxy)
        if code:
            yield self._send_request(1, code, proxy)

    def parse_order(self, response):
        meta = response.meta
        page = meta.get('page', 1)
        code = meta.get('code')
        proxy = meta.get('proxy', 'NO_PROXY')
        # 处理订单列表
        order_list = parse_order(response)
        if order_list:
            self.logger.info('列表: page=%s, count=%s, proxy=%s, code=%s' % (page, len(order_list), proxy, code))
            yield Box('order', self.batch_no, order_list)

            if page and page <= self.max_page:
                proxy = self.proxy_facade.get_proxy()
                code = self.captcha_helper.get_captcha_code(proxy)
                if code:
                    next_page = page + 1
                    yield self._send_request(next_page, code, proxy)
        else:
            self.logger.info('列表为空: page=%s, proxy=%s, code=%s' % (page, proxy, code))
